import os
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly as py
import chart_studio
import plotly.offline
import plotly.graph_objs as go
import cufflinks as cf
from plotly.graph_objs import *
%matplotlib inline
plotly.offline.init_notebook_mode(connected=True)
cf.go_offline()
warnings.filterwarnings('ignore')
data = pd.read_csv('air_data.csv',encoding='ANSI')
print(data.shape)
data.head()
(62988, 44)
| MEMBER_NO | FFP_DATE | FIRST_FLIGHT_DATE | GENDER | FFP_TIER | WORK_CITY | WORK_PROVINCE | WORK_COUNTRY | AGE | LOAD_TIME | ... | ADD_Point_SUM | Eli_Add_Point_Sum | L1Y_ELi_Add_Points | Points_Sum | L1Y_Points_Sum | Ration_L1Y_Flight_Count | Ration_P1Y_Flight_Count | Ration_P1Y_BPS | Ration_L1Y_BPS | Point_NotFlight | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 54993 | 2006/11/2 | 2008/12/24 | 男 | 6 | . | 北京 | CN | 31.0 | 2014/3/31 | ... | 39992 | 114452 | 111100 | 619760 | 370211 | 0.509524 | 0.490476 | 0.487221 | 0.512777 | 50 |
| 1 | 28065 | 2007/2/19 | 2007/8/3 | 男 | 6 | NaN | 北京 | CN | 42.0 | 2014/3/31 | ... | 12000 | 53288 | 53288 | 415768 | 238410 | 0.514286 | 0.485714 | 0.489289 | 0.510708 | 33 |
| 2 | 55106 | 2007/2/1 | 2007/8/30 | 男 | 6 | . | 北京 | CN | 40.0 | 2014/3/31 | ... | 15491 | 55202 | 51711 | 406361 | 233798 | 0.518519 | 0.481481 | 0.481467 | 0.518530 | 26 |
| 3 | 21189 | 2008/8/22 | 2008/8/23 | 男 | 5 | Los Angeles | CA | US | 64.0 | 2014/3/31 | ... | 0 | 34890 | 34890 | 372204 | 186100 | 0.434783 | 0.565217 | 0.551722 | 0.448275 | 12 |
| 4 | 39546 | 2009/4/10 | 2009/4/15 | 男 | 6 | 贵阳 | 贵州 | CN | 48.0 | 2014/3/31 | ... | 22704 | 64969 | 64969 | 338813 | 210365 | 0.532895 | 0.467105 | 0.469054 | 0.530943 | 39 |
5 rows × 44 columns
导入数据集csv,以ANSI编码格式打开
一.在接下来的步骤中,将对数据进行前期观测,并尝试着分析某些数据间是否存在着某种关联,同时也对LRFMC模型中需要用到的属性,进行观测。
plt.subplots(figsize=(14,14))
sns.heatmap(data.corr(),annot=True)
<AxesSubplot:>
使用heatmap大体查看数据各列间是否存在显著联系,没看出什么
sns.pairplot(data=data)
<seaborn.axisgrid.PairGrid at 0x1c9ce562f50>